#!/bin/bash

export BASE_DATA_PATH=/home/mixagol/data
export BASE_DATE=20120802
export LC_ALL=C


# BLAST
export CUR_DATA_DIR=${BASE_DATA_PATH}/3_raw_matrix_nw/${BASE_DATE}/blast
mkdir -p $CUR_DATA_DIR/input_files
zcat ${BASE_DATA_PATH}/2_raw_matrix_find/${BASE_DATE}/blast/results_0.05_sorted.txt.gz | 3_raw_matrix_nw/split_genomes.py $CUR_DATA_DIR/input_files

# WU_BLAST
export CUR_DATA_DIR=${BASE_DATA_PATH}/3_raw_matrix_nw/${BASE_DATE}/wu_blast
mkdir -p $CUR_DATA_DIR/input_files
zcat ${BASE_DATA_PATH}/2_raw_matrix_find/${BASE_DATE}/wu_blast/results_0.05_sorted.txt.gz | 3_raw_matrix_nw/split_genomes.py $CUR_DATA_DIR/input_files


mkdir -p $CUR_DATA_DIR/output_files

g++ -static -O3 3_raw_matrix_nw/nw_aligner_batch.cpp -o 3_raw_matrix_nw/nw_aligner_batch

#ls $CUR_DATA_DIR/input_files | xargs -n1 -P1 -I {} \
#    bash -c "zcat $CUR_DATA_DIR/input_files/"{}" \
#        | 3_raw_matrix_nw/prepare_data.py -G $BASE_DATA_PATH/1_databases/${BASE_DATE}/genom.db -g $BASE_DATA_PATH/1_databases/${BASE_DATE}/gen.db \
#        | 3_raw_matrix_nw/nw_aligner_batch -i ${BASE_DATA_PATH}/3_raw_matrix_nw/simulation/pupy/interpolation.txt \
#        | gzip \
#        > $CUR_DATA_DIR/output_files/"{}

rm -rf /home/mixagol/work/data/sync_nw_workers/
mkdir /home/mixagol/work/data/sync_nw_workers
rm -rf /home/mixagol/work/data/1_databases/
mkdir -p /home/mixagol/work/data/1_databases/
cp ${BASE_DATA_PATH}/1_databases/${BASE_DATE}/gen.db   /home/mixagol/work/data/1_databases/
cp ${BASE_DATA_PATH}/1_databases/${BASE_DATE}/genom.db /home/mixagol/work/data/1_databases/
rm -rf  ~/work/data/nw_input
cp -r $CUR_DATA_DIR/input_files ~/work/data/nw_input
rm -rf /home/mixagol/work/data/nw_output
mkdir /home/mixagol/work/data/nw_output
cp ${BASE_DATA_PATH}/3_raw_matrix_nw/simulation/pupy/interpolation.txt /home/mixagol/work/data/pupy_interpolation.txt

mpirun -q quad -np 20 ~/runner /home/mixagol/trunk/gene-class/3_raw_matrix_nw/run_nw_worker.sh \
    /home/mixagol/work/data/sync_nw_workers \
    /home/mixagol/work/data/nw_input \
    /home/mixagol/work/data/nw_output \
    /home/mixagol/work/data/1_databases \
    /home/mixagol/work/data/pupy_interpolation.txt

cp -r /home/mixagol/work/data/nw_output/ $CUR_DATA_DIR/output_files

find $CUR_DATA_DIR/output_files -type f \
    | sort \
    | xargs -n1 -P1 -I {} bash -c "zcat {} | sort -t$'\t' -k1,2 -s -k3,3nr | sort -t$'\t' -k1,2 -u" \
    | sort -t$'\t' -S2G -T $CUR_DATA_DIR/ -k2,2 \
    | gzip \
    > $CUR_DATA_DIR/results.txt.gz

zcat $CUR_DATA_DIR/results.txt.gz \
    | awk -F'\t' '{if ($3>=10) print}' \
    | gzip \
    > $CUR_DATA_DIR/results_gt10.txt.gz



